
cd ""
clear 
set more off

use data_survey_merged

* generates a variable for delay between the first and 2nd wave
gen end=substr(end_date,1,10)
gen end_d=date(end,"MDY")
gen w2_end=substr(w2_end_date,1,10)
gen w2_end_d=date(w2_end,"MDY")
gen t_recontact=w2_end_d-end_d
sum t_recontact,d

*** relevant profession categories (others are marginal in the sample)
gen prof_relev=profession
replace prof_relev=. if inlist(profession,1,5)

*** relevant highest achieved edu categories (the first one is marginal in the sample)
gen edu_relev=highest_edu
replace edu_relev=. if inlist(highest_edu,1)


* w1w2_variables represent averages across the 2 survey waves
foreach var of varlist extrav consci neurot agree open SOEP_e SOEP_a SOEP_c SOEP_n SOEP_o  GPS* cognitive cog_* GPA_pct reliable length_m sure_personality {
	gen w1w2_`var'=(`var'+w2_`var')/2
	* to not lose obs which aren't in w2, just keep the number for w1 - no new information but observations are kept
	replace w1w2_`var'=`var' if w1w2_`var'==.
	}


	
gen rel_high=(reliable>9)
gen w2_rel_high=(w2_reliable>9)
gen young=(age<=21)

* individuals who self-report to provide reliable responses overall
gen w1w2_rel_high=0 if (rel_high==0 | w2_rel_high==0) & wave2==1
replace w1w2_rel_high=1 if rel_high==1 & w2_rel_high==1 & wave2==1
replace w1w2_rel_high=rel_high if w1w2_rel_high==.

* recodeself-reported overall reliability to be 0-10 for reporting (instead of 1-11)
foreach var of varlist reliable {
	replace `var'=`var'-1
	replace w2_`var'=w2_`var'-1
	replace w1w2_`var'=w1w2_`var'-1	

}	

rename (sure_personality w2_sure_personality w1w2_sure_personality) (sure_pers w2_sure_pers w1w2_sure_pers)

**** as above, but answering the question after finishing BFI Qs:"Is sure that my answers to these questions describe me accurately)
gen sure_high=(sure_pers==5)
gen w2_sure_high=(w2_sure_pers==5) if w2_BFI2_1!=.

gen w1w2_sure_high=0 if (sure_high==0 | w2_sure_high==0) & w2_BFI2_1!=.
replace w1w2_sure_high=1 if sure_high==1 & w2_sure_high==1 & w2_BFI2_1!=.
replace w1w2_sure_high=sure_high if w1w2_sure_high==.


	



**** has low extra incentive individuals as 1 (whether they only got it in w1 or also participated in w2)
gen w1w2_extra_inc_cats=0 if extra_inc==0
replace w1w2_extra_inc_cats=1 if extra_inc==1
replace w1w2_extra_inc_cats=2 if extra_inc==1 & w2_extra_inc_high==1


* separates individuals by mean/median recontact time (in weeks)
gen t_recontact_cat2=1 if t_recontact<=35
replace t_recontact_cat2=2 if t_recontact>35 & t_recontact!=.

* survey length^2
gen w1w2_length2=w1w2_length_m*w1w2_length_m

* indicator for answering both waves 
gen both_waves=0
replace both_waves=1 if wave2==1
* has meaningful observations in both waves
gen both_waves_ok=(w2_BFI2_1!=.)






* getting outcomes
* unemployed as opposed to employed or in college
gen unemp=1 if prof_relev==4
replace unemp=0 if prof_relev==3 | prof_relev==2

* having already graduated from college - has an age gradient but can control for that 
gen college_grad=(highest_edu>=3)
replace college_grad=. if highest_edu==5


	
sort CID

*includes observations identified as low quality by the survey provider
save data_all_vars_outliers, replace	
	

* excludes observations identified as low quality by the survey provider: deletes those from wave1 and replace those in wave2 with a blank (so that the wave1 observation is preserved if that one is OK for a given individual but wave2 is not) 
drop if inlist(dynata_outliers,1)
foreach var of varlist w2_* {
	capture replace `var'=. if dynata_outliers_w2==1
	capture replace `var'="." if dynata_outliers_w2==1
	}
	

*** generates rushing dummies (base and robustness) 
foreach con in none w2_  {
    forvalues dec=5(5)20 {
		local n=1
		foreach var in length_m {
			local pre="`con'"
			if "`con'"=="none" {
				local pre ""
			} 
	
			capture drop `pre'd_pct`dec'_`var'
			gen `pre'd_pct`dec'_`var'=.
			_pctile `pre'`var',nq(100)
			* r1 is 10th percentile, r5 is median, r9 is 90th percentile, r10 is not generated with nq10
			replace  `pre'd_pct`dec'_`var'=1 if `pre'`var'<`r(r`dec')'
			replace  `pre'd_pct`dec'_`var'=0 if `pre'`var'>=`r(r`dec')' & `pre'`var'!=.
			local ++n

			}

	}		
}		
* generates a 0-1 dummy depending on whether the individual is below the base/robustness criterion for rushing on neither wave or at least one wave
foreach var of varlist d_pct* { 
	egen av1_`var'=rowmean(`var' w2_`var')
	replace av1_`var'=2*av1_`var'
	replace av1_`var'=1 if av1_`var'==2
}	
drop d_pct* w2_d_pct*


* standardizing for comparability of coefficients and ease of comparison 
foreach var of varlist extrav consci neurot agree open SOEP_e SOEP_a SOEP_c SOEP_n SOEP_o LS_gen SWLS GPS* cognitive  q_mood_beginning_1 q_mood_end_1 cog_*   {
	sum `var' if `var'!=.
	replace `var'=(`var'-`r(mean)')/`r(sd)' if `var'!=.
	sum w2_`var' if w2_`var'!=.
	replace w2_`var'=(w2_`var'-`r(mean)')/`r(sd)' if w2_`var'!=.
	capture sum w1w2_`var' if w1w2_`var'!=.
	capture replace w1w2_`var'=(w1w2_`var'-`r(mean)')/`r(sd)' if w1w2_`var'!=.	
	}

*** normalize the reliability variables 
*** keep the non-normalized variables as weight_`var'
foreach var of varlist w1w2_sure_pers w1w2_reliable reliable sure_pers w2_reliable w2_sure_pers {
	gen weight_`var'=`var'
	sum `var' if `var'!=.
	replace `var'=(`var'-`r(mean)')/`r(sd)' if `var'!=.
	
	}



save data_all_vars, replace




	

******************************* Exhibits Start ************************************************************************************

**************** Below Generates Table A.7

* names for all single item tr-correlations 
capture drop tr_corr_names_single
gen tr_corr_names_single=""
local i=1
foreach var of varlist BFI2_1-SOEP_extra6  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen section4*  cog_* {
	replace tr_corr_names_single="`var'" in `i'
	local ++i
	}

capture drop tr_corr_single
gen tr_corr_single=.
local j=1
foreach var of varlist BFI2_1-SOEP_extra6  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen section4*  cog_* {
				
					corr `var' w2_`var'
						
					replace tr_corr_single=`r(rho)' in `j'
					local ++j
					di `j'
				
			
		
	}	

**************** Inputs for Table A.7	
br 	tr_corr_names_single tr_corr_single


**************** Below Generates Tables, which use facets of personality traits for test-retest correlations: Table 1, A.6 
* names
capture drop tr_corr_names
gen tr_corr_names=""
local i=1

foreach var of varlist extrav sociability assertiveness energy   consci  organization productiveness responsibility neurot anxiety depression em_volatily agree compassion respectfulness trust  open curiosity aesthetic_sense imagination  SOEP_e SOEP_c   SOEP_n SOEP_a SOEP_o  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other  LS_gen SWLS cog_* {
	replace tr_corr_names="`var'" in `i'
	local ++i
	}

* general test-retest correlations
capture drop tr_corr
gen tr_corr=.
local j=1
foreach var of varlist extrav sociability assertiveness energy   consci  organization productiveness responsibility neurot anxiety depression em_volatily agree compassion respectfulness trust  open curiosity aesthetic_sense imagination  SOEP_e SOEP_c   SOEP_n SOEP_a SOEP_o  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS  cog_* {
				
					corr `var' w2_`var'
						
					replace tr_corr=`r(rho)' in `j'
					local ++j
					di `j'
		
	}
	
* general test-retest correlations for reliable sample
capture drop tr_corr_sure
gen tr_corr_sure=.
local j=1
foreach var of varlist extrav sociability assertiveness energy   consci  organization productiveness responsibility neurot anxiety depression em_volatily agree compassion respectfulness trust  open curiosity aesthetic_sense imagination  SOEP_e SOEP_c   SOEP_n SOEP_a SOEP_o  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS  cog_* {
				
					corr `var' w2_`var' if w1w2_sure_high==1
						
					replace tr_corr_sure=`r(rho)' in `j'
					local ++j
					di `j'
		
	}	

* general test-retest correlations by country
foreach cat of varlist cntry{
		* records the number of categories in each variable (used for naming the variables i.e. a binary variable has 2 categories and results will be recorded for test-retest correlations breaking down by whether the variable =0 or =1, in the names this will correspond to var1 and var2 respectively)
		tab `cat'
		local num=`r(r)'
		sum `cat'
		local min=`r(min)'
		forvalues k=1/`num' {
			local j=1
			capture drop tr_`cat'`k'
			capture gen tr_`cat'`k'=.
			* with facets
foreach var of varlist extrav sociability assertiveness energy   consci  organization productiveness responsibility neurot anxiety depression em_volatily agree compassion respectfulness trust  open curiosity aesthetic_sense imagination  SOEP_e SOEP_c   SOEP_n SOEP_a SOEP_o  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS  cog_* {
					if `min'==0{
						corr `var' w2_`var' if `cat'==(`k'-1)
						}
					if `min'==1{
						corr `var' w2_`var' if `cat'==`k'
						}					
					replace tr_`cat'`k'=`r(rho)' in `j'
					local ++j
				
			}
		}	
	}		
	
**************** Inputs for Table 1
br 	tr_corr_names tr_corr tr_cntry* 
**************** Inputs for Table A.6
br 	tr_corr_names tr_corr tr_corr_sure



**************** Below Generates Tables, which use full personality traits for test-retest correlations: Table 2, A.9 
* names
capture drop tr_corr_names
gen tr_corr_names=""
local i=1
foreach var of varlist extrav consci neurot agree open SOEP_e SOEP_c   SOEP_n SOEP_a SOEP_o  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS  cog_* {
	replace tr_corr_names="`var'" in `i'
	local ++i
	}

	
* general test-retest correlations by reliability (BFI and overall)
foreach cat of varlist w1w2_sure_high w1w2_rel_high  sure_high rel_high {
		* records the number of categories in each variable (used for naming the variables i.e. a binary variable has 2 categories and results will be recorded for test-retest correlations breaking down by whether the variable =0 or =1, in the names this will correspond to var1 and var2 respectively)
		tab `cat'
		local num=`r(r)'
		sum `cat'
		local min=`r(min)'
		forvalues k=1/`num' {
			local j=1
			capture drop tr_`cat'`k'
			capture gen tr_`cat'`k'=.
			foreach var of varlist extrav consci neurot agree open SOEP_e SOEP_c   SOEP_n SOEP_a SOEP_o  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS  cog_* {
			
					if `min'==0{
						corr `var' w2_`var' if `cat'==(`k'-1)
						}
					if `min'==1{
						corr `var' w2_`var' if `cat'==`k'
						}					
					replace tr_`cat'`k'=`r(rho)' in `j'
					local ++j
				
			}
		}	
	}	

**************** Inputs for Table 2
order tr_corr_names tr_w1w2_sure_high2	tr_w1w2_sure_high1	tr_w1w2_rel_high2	tr_w1w2_rel_high1, last
br 	tr_corr_names tr_w1w2*

* general test-retest correlations by reliability (BFI and overall), excluding individuals who rushed the survey
drop tr_w1w2*
foreach cat of varlist w1w2_sure_high w1w2_rel_high  {
		* records the number of categories in each variable (used for naming the variables i.e. a binary variable has 2 categories and results will be recorded for test-retest correlations breaking down by whether the variable =0 or =1, in the names this will correspond to var1 and var2 respectively)
		tab `cat'
		local num=`r(r)'
		sum `cat'
		local min=`r(min)'
		forvalues k=1/`num' {
			local j=1
			capture drop tr_`cat'`k'
			capture gen tr_`cat'`k'=.
			foreach var of varlist extrav consci neurot agree open SOEP_e SOEP_c   SOEP_n SOEP_a SOEP_o  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS  cog_* {
			
					if `min'==0{
						corr `var' w2_`var' if `cat'==(`k'-1) & av1_d_pct5_length_m==0
						}
					if `min'==1{
						corr `var' w2_`var' if `cat'==`k' & av1_d_pct5_length_m==0
						}					
					replace tr_`cat'`k'=`r(rho)' in `j'
					local ++j
				
			}
		replace tr_`cat'`k'=`r(N)' in `j'
		}	
	}
	
**************** Inputs for Table A.9
order tr_corr_names tr_w1w2_sure_high2	tr_w1w2_sure_high1	tr_w1w2_rel_high2	tr_w1w2_rel_high1 tr_sure_high2	tr_sure_high1	tr_rel_high2	tr_rel_high1, last
br 	tr_corr_names tr_w1w2_sure_high2	tr_w1w2_sure_high1	tr_w1w2_rel_high2	tr_w1w2_rel_high1 tr_sure_high2	tr_sure_high1	tr_rel_high2	tr_rel_high1

	
	
	
	
	
	
*** inputs for Table A.5. Looks at whether various characteristics are significant predictors of test-retest correlation strength (have a statistically significant interaction term, at 5%)
foreach cat of varlist male young t_recontact_cat2 pc  w1w2_extra_inc_cats BFI_first GPA_first      {
    local i=1
	foreach var of varlist extrav    consci   neurot  agree  open  SOEP_e SOEP_c   SOEP_n SOEP_a SOEP_o  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec 	GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS cognitive{
		gen w2_dep=w2_`var'
		gen second=`cat'	
		reg `var' c.w2_dep##second 
		if `i'==1 {
			outreg2 using tr_`cat'.xls, replace dec(2) noomitted
			}
		else {
			outreg2 using tr_`cat'.xls, append dec(2) noomitted	
			}	
		local ++i
		drop w2_dep
		drop second
		}
	}

	


	
		
************** Below generates Figure 2
*** breakdown by percentile: 5-20-40-60-80-100 
local k=1
foreach time of varlist length_m w2_length_m {
	foreach var of varlist  extrav consci neurot agree open  {
			local n=1 
			capture drop c_t_`var'`k'=.		
			capture gen c_t_`var'`k'=.
			sum `time' if w2_extrav!=.,d
			* test-retest correlations for those who rushed (<5th percentile in survey time distribution)
			corr `var' w2_`var' if `time'<=`r(p5)'
			replace  c_t_`var'`k'=`r(rho)' in `n'
			local ++n
			_pctile `time' if w2_extrav!=.,nq(100)
			* test-retest correlations for those above the rushing threshold up to 20th percentile in survey time distribution			
			corr `var' w2_`var' if `time'>`r(r5)' & `time'<=`r(r20)'
			replace  c_t_`var'`k'=`r(rho)' in `n'
			* test-retest correlations by quintile between 20th and 80th percentile in survey time distribution						
			forvalues i= 2 (2) 6 {    
				local ++n
				_pctile `time' if w2_extrav!=.,nq(10)
				* intervals of .2			
				local j=`i'+2
				di `i'
				di `n'
				di `r(r`i')'
				corr `var' w2_`var' if (`time'>`r(r`i')') & (`time'<=`r(r`j')')		
				replace  c_t_`var'`k'=`r(rho)' in `n'			
			}
			local ++n
			* test-retest correlations for last quintile in survey time distribution							
			_pctile `time' if w2_extrav!=.,nq(100)
			corr `var' w2_`var' if `time'>`r(r80)' 		
			replace  c_t_`var'`k'=`r(rho)' in `n'
		}
	local ++k	
	}

	
foreach var in  extrav consci neurot agree open  {
	egen av_c_t_`var'=rowmean(c_t_`var'1 c_t_`var'2)
}
**************** Inputs for Figure 2
br av_c*